{
 "cells": [
  {
   "cell_type": "markdown",
   "source": [
    "# Evidently Test Presets"
   ],
   "metadata": {},
   "id": "4686447fe36c2254"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "source": [
    "try:\n",
    "    import evidently\n",
    "except:\n",
    "    !pip install git+https://github.com/evidentlyai/evidently.git"
   ],
   "outputs": [],
   "metadata": {},
   "id": "2240ee227cc03dd7"
  },
  {
   "cell_type": "markdown",
   "source": [
    "## Prepare Datasets"
   ],
   "metadata": {},
   "id": "4ac5e9f30f468cc4"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "from sklearn import datasets, ensemble, model_selection\n",
    "\n",
    "from evidently import ColumnMapping\n",
    "from evidently.test_suite import TestSuite\n",
    "\n",
    "from evidently.test_preset import NoTargetPerformanceTestPreset\n",
    "from evidently.test_preset import DataQualityTestPreset\n",
    "from evidently.test_preset import DataStabilityTestPreset\n",
    "from evidently.test_preset import DataDriftTestPreset\n",
    "from evidently.test_preset import RegressionTestPreset\n",
    "from evidently.test_preset import MulticlassClassificationTestPreset\n",
    "from evidently.test_preset import BinaryClassificationTopKTestPreset\n",
    "from evidently.test_preset import BinaryClassificationTestPreset"
   ],
   "outputs": [],
   "metadata": {},
   "id": "489c20fccfa47c9"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "source": [
    "#Dataset for Data Quality and Integrity\n",
    "adult_data = datasets.fetch_openml(name='adult', version=2, as_frame=True)\n",
    "adult = adult_data.frame\n",
    "\n",
    "adult_ref = adult[~adult.education.isin(['Some-college', 'HS-grad', 'Bachelors'])]\n",
    "adult_cur = adult[adult.education.isin(['Some-college', 'HS-grad', 'Bachelors'])]\n",
    "\n",
    "adult_cur.iloc[:2000, 3:5] = np.nan"
   ],
   "outputs": [],
   "metadata": {},
   "id": "8ca07fbb5163dc25"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "source": [
    "#Dataset for Regression\n",
    "housing_data = datasets.fetch_california_housing(as_frame=True)\n",
    "housing = housing_data.frame\n",
    "\n",
    "housing.rename(columns={'MedHouseVal': 'target'}, inplace=True)\n",
    "housing['prediction'] = housing_data['target'].values + np.random.normal(0, 3, housing.shape[0])\n",
    "\n",
    "housing_ref = housing.sample(n=5000, replace=False)\n",
    "housing_cur = housing.sample(n=5000, replace=False)"
   ],
   "outputs": [],
   "metadata": {},
   "id": "7498ffdfde2b5ab8"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "source": [
    "#Dataset for Binary Probabilistic Classifcation\n",
    "bcancer_data = datasets.load_breast_cancer(as_frame=True)\n",
    "bcancer = bcancer_data.frame\n",
    "\n",
    "bcancer_ref = bcancer.sample(n=300, replace=False)\n",
    "bcancer_cur = bcancer.sample(n=200, replace=False)\n",
    "\n",
    "bcancer_label_ref = bcancer_ref.copy(deep=True)\n",
    "bcancer_label_cur = bcancer_cur.copy(deep=True)\n",
    "\n",
    "model = ensemble.RandomForestClassifier(random_state=1, n_estimators=10)\n",
    "model.fit(bcancer_ref[bcancer_data.feature_names.tolist()], bcancer_ref.target)\n",
    "\n",
    "bcancer_ref['prediction'] = model.predict_proba(bcancer_ref[bcancer_data.feature_names.tolist()])[:, 1]\n",
    "bcancer_cur['prediction'] = model.predict_proba(bcancer_cur[bcancer_data.feature_names.tolist()])[:, 1]\n",
    "\n",
    "bcancer_label_ref['prediction'] = model.predict(bcancer_label_ref[bcancer_data.feature_names.tolist()])\n",
    "bcancer_label_cur['prediction'] = model.predict(bcancer_label_cur[bcancer_data.feature_names.tolist()])"
   ],
   "outputs": [],
   "metadata": {},
   "id": "503c2f96a808e978"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "source": [
    "#Dataset for Multiclass Classifcation\n",
    "iris_data = datasets.load_iris(as_frame=True)\n",
    "iris = iris_data.frame\n",
    "\n",
    "iris_ref = iris.sample(n=75, replace=False)\n",
    "iris_cur = iris.sample(n=75, replace=False)\n",
    "\n",
    "model = ensemble.RandomForestClassifier(random_state=1, n_estimators=3)\n",
    "model.fit(iris_ref[iris_data.feature_names], iris_ref.target)\n",
    "\n",
    "iris_ref['prediction'] = model.predict(iris_ref[iris_data.feature_names])\n",
    "iris_cur['prediction'] = model.predict(iris_cur[iris_data.feature_names])"
   ],
   "outputs": [],
   "metadata": {},
   "id": "51e775fed2d7a7fe"
  },
  {
   "cell_type": "markdown",
   "source": [
    "## How to use Test Presets?"
   ],
   "metadata": {},
   "id": "4c29130437a1cb61"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "source": [
    "data_stability = TestSuite(tests=[\n",
    "    DataStabilityTestPreset(),\n",
    "])\n",
    "\n",
    "data_stability.run(reference_data=adult_ref, current_data=adult_cur)\n",
    "data_stability"
   ],
   "outputs": [],
   "metadata": {
    "scrolled": true
   },
   "id": "987059ebf500a46d"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "source": [
    "#test preset in a JSON format\n",
    "data_stability.json()"
   ],
   "outputs": [],
   "metadata": {
    "scrolled": false
   },
   "id": "bf375c6c30172f50"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "source": [
    "#test preset as a python object\n",
    "data_stability.as_dict()"
   ],
   "outputs": [],
   "metadata": {},
   "id": "beae36b35230eeea"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "source": [
    "data_quality = TestSuite(tests=[\n",
    "    DataQualityTestPreset(),\n",
    "])\n",
    "\n",
    "data_quality.run(reference_data=adult_ref, current_data=adult_cur)\n",
    "data_quality"
   ],
   "outputs": [],
   "metadata": {
    "scrolled": true
   },
   "id": "4efce74742c6f0ff"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "source": [
    "data_drift = TestSuite(tests=[\n",
    "    DataDriftTestPreset(stattest='psi'),\n",
    "])\n",
    "\n",
    "data_drift.run(reference_data=adult_ref, current_data=adult_cur)\n",
    "data_drift"
   ],
   "outputs": [],
   "metadata": {},
   "id": "22b4310189d9b957"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "source": [
    "no_target_performance = TestSuite(tests=[\n",
    "    NoTargetPerformanceTestPreset(columns=['education-num', 'hours-per-week'],  num_stattest='ks', cat_stattest='psi'),\n",
    "])\n",
    "\n",
    "no_target_performance.run(reference_data=adult_ref, current_data=adult_cur)\n",
    "no_target_performance"
   ],
   "outputs": [],
   "metadata": {
    "scrolled": false
   },
   "id": "cbc6dfc5e3a01766"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "source": [
    "regression_performance = TestSuite(tests=[\n",
    "    RegressionTestPreset()\n",
    "])\n",
    "\n",
    "regression_performance.run(reference_data=housing_ref, current_data=housing_cur)\n",
    "regression_performance"
   ],
   "outputs": [],
   "metadata": {},
   "id": "43f4bde68aa76fa7"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "source": [
    "classification_performance = TestSuite(tests=[\n",
    "   MulticlassClassificationTestPreset(stattest='psi')\n",
    "])\n",
    "\n",
    "classification_performance.run(reference_data=iris_ref, current_data=iris_cur)\n",
    "classification_performance"
   ],
   "outputs": [],
   "metadata": {},
   "id": "8a5ecb0cce1b1ba8"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "source": [
    "label_binary_classification_performance = TestSuite(tests=[\n",
    "    BinaryClassificationTestPreset(),\n",
    "])\n",
    "\n",
    "label_binary_classification_performance.run(reference_data=bcancer_label_ref, current_data=bcancer_label_cur)\n",
    "label_binary_classification_performance"
   ],
   "outputs": [],
   "metadata": {},
   "id": "e5562205b472df67"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "source": [
    "prob_binary_classification_performance = TestSuite(tests=[\n",
    "    BinaryClassificationTestPreset(stattest='psi', probas_threshold=0.89),\n",
    "])\n",
    "\n",
    "prob_binary_classification_performance.run(reference_data=bcancer_ref, current_data=bcancer_cur)\n",
    "prob_binary_classification_performance"
   ],
   "outputs": [],
   "metadata": {
    "scrolled": false
   },
   "id": "e34831b25cc4ea45"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "source": [
    "binary_topK_classification_performance = TestSuite(tests=[\n",
    "    BinaryClassificationTopKTestPreset(k=10, stattest='psi'),\n",
    "])\n",
    "\n",
    "binary_topK_classification_performance.run(reference_data=bcancer_ref, current_data=bcancer_cur)\n",
    "binary_topK_classification_performance"
   ],
   "outputs": [],
   "metadata": {
    "scrolled": false
   },
   "id": "2e820f235da0a257"
  },
  {
   "cell_type": "markdown",
   "source": [
    "# Support Evidently\n",
    "Did you find the example useful? Star Evidently on GitHub to contribute back! This helps us continue creating free open-source tools for the community. https://github.com/evidentlyai/evidently"
   ],
   "metadata": {},
   "id": "49f84aca224d2a9b"
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.16"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
